setwd("~/Desktop/DSCI_304/Final_Project")
#https://www.kaggle.com/datasets/datasnaek/chess?resource=download
games <- read.csv("games.csv")
#View(games)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
# Top 15 openings
top_openings <- games %>%
  count(opening_name, sort = TRUE) %>%
  slice_head(n = 15) %>%
  pull(opening_name)

# Filter to top 15
games_top <- games %>%
  filter(opening_name %in% top_openings)

# Count number of games by opening and winner
summary_df <- games_top %>%
  group_by(opening_name, winner) %>%
  summarise(n = n(), .groups = "drop") %>%
  group_by(opening_name) %>%
  mutate(total = sum(n),
         pct = n / total) %>%
  ungroup()

# Interactive stacked bar chart
plot_ly(
  summary_df,
  x = ~reorder(opening_name, -total),
  y = ~n,
  color = ~winner,
  type = "bar",
  text = ~paste0(
    "Winner: ", winner, "<br>",
    "Games: ", n, "<br>",
    "Win %: ", scales::percent(pct, accuracy = 0.1)
  ),
  hoverinfo = "text"
) %>%
  layout(
    barmode = "stack",
    title = "Number of Games by Opening and Winner",
    xaxis = list(title = "Opening Name"),
    yaxis = list(title = "Number of Games")
  )